"""
Tests for PDF upload endpoint.

Tests cover:
- Authentication requirements
- File validation (size, count, type)
- Successful upload and text extraction
- Error handling for invalid files
"""

import io


class TestPDFUpload:
    """Integration tests for /api/upload/pdf endpoint."""

    def test_upload_requires_authentication(self, client):
        """Test that unauthenticated requests are rejected."""
        # Create a minimal PDF
        pdf_content = b"%PDF-1.4\ntest content"
        data = {"files": (io.BytesIO(pdf_content), "test.pdf")}

        response = client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Should redirect to login or return 401/302
        assert response.status_code in [302, 401]

    def test_upload_no_files_returns_error(self, authenticated_client):
        """Test that requests without files return appropriate error."""
        response = authenticated_client.post(
            "/api/upload/pdf",
            data={},
            content_type="multipart/form-data",
        )

        assert response.status_code == 400
        data = response.get_json()
        assert "error" in data or "No files" in str(data)

    def test_upload_empty_file_list_returns_error(self, authenticated_client):
        """Test that empty file list returns appropriate error."""
        data = {"files": (io.BytesIO(b""), "")}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        assert response.status_code == 400

    def test_upload_invalid_file_type_rejected(self, authenticated_client):
        """Test that non-PDF files are rejected."""
        # Create a text file masquerading as PDF
        txt_content = b"This is not a PDF file"
        data = {"files": (io.BytesIO(txt_content), "test.pdf")}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Should fail validation
        data = response.get_json()
        # Either 400 with error or success with errors list
        if response.status_code == 200:
            assert len(data.get("errors", [])) > 0
        else:
            assert response.status_code == 400

    def test_upload_file_with_wrong_extension_rejected(
        self, authenticated_client
    ):
        """Test that files with wrong extension are rejected."""
        # Valid PDF content but wrong extension
        pdf_content = b"%PDF-1.4\n1 0 obj\n<<\n>>\nendobj\nxref\ntrailer\n"
        data = {"files": (io.BytesIO(pdf_content), "test.txt")}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        data = response.get_json()
        # Should have errors due to extension
        if response.status_code == 200:
            assert len(data.get("errors", [])) > 0
        else:
            assert response.status_code == 400

    def test_upload_oversized_file_rejected(self, authenticated_client):
        """Test that files over size limit are rejected."""
        # Create file over 10MB limit
        large_content = b"%PDF-1.4\n" + (b"x" * (11 * 1024 * 1024))  # 11MB
        data = {"files": (io.BytesIO(large_content), "large.pdf")}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Should be rejected
        data = response.get_json()
        if response.status_code == 200:
            # If 200, should have errors
            assert len(data.get("errors", [])) > 0
        else:
            assert response.status_code in [400, 413]

    def test_upload_too_many_files_rejected(self, authenticated_client):
        """Test that more than 100 files are rejected."""
        files = []
        # Create 101 minimal PDFs
        for i in range(101):
            pdf_content = b"%PDF-1.4\ntest"
            files.append(("files", (io.BytesIO(pdf_content), f"test_{i}.pdf")))

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=files,
            content_type="multipart/form-data",
        )

        assert response.status_code == 400
        data = response.get_json()
        assert "error" in data

    def test_upload_malformed_pdf_handled(self, authenticated_client):
        """Test that malformed PDFs are handled gracefully."""
        # PDF magic bytes but invalid structure
        malformed_pdf = b"%PDF-1.4\nthis is not valid pdf structure"
        data = {"files": (io.BytesIO(malformed_pdf), "malformed.pdf")}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Should not crash - either 200 with errors or 400
        data = response.get_json()
        assert response.status_code in [200, 400]
        if response.status_code == 200:
            # Should report errors for the malformed file
            assert (
                data.get("processed_files", 0) == 0
                or len(data.get("errors", [])) > 0
            )

    def test_upload_returns_correct_structure(self, authenticated_client):
        """Test that successful upload returns expected response structure."""
        # Note: This test may fail if pdfplumber can't parse minimal PDF
        # In that case, it should at least not crash
        pdf_content = b"%PDF-1.4\nminimal pdf"
        data = {"files": (io.BytesIO(pdf_content), "test.pdf")}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Response should have expected keys
        data = response.get_json()
        assert "status" in data or "error" in data

    def test_upload_multiple_files(self, authenticated_client):
        """Test uploading multiple files at once."""
        files = []
        for i in range(3):
            pdf_content = b"%PDF-1.4\ntest " + str(i).encode()
            files.append(("files", (io.BytesIO(pdf_content), f"test_{i}.pdf")))

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=files,
            content_type="multipart/form-data",
        )

        # Should process all files (may have errors for invalid PDFs)
        data = response.get_json()
        assert "status" in data or "error" in data


class TestPDFUploadSecurity:
    """Security-focused tests for PDF upload."""

    def test_path_traversal_in_filename_sanitized(self, authenticated_client):
        """Test that path traversal attempts in filenames are handled."""
        pdf_content = b"%PDF-1.4\ntest"
        malicious_filename = "../../../etc/passwd.pdf"
        data = {"files": (io.BytesIO(pdf_content), malicious_filename)}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Should not crash and should sanitize filename
        assert response.status_code in [200, 400]
        if response.status_code == 200:
            data = response.get_json()
            # Check filename is sanitized in response
            for text in data.get("extracted_texts", []):
                assert "../" not in text.get("filename", "")

    def test_null_byte_in_filename_handled(self, authenticated_client):
        """Test that null bytes in filenames are handled."""
        pdf_content = b"%PDF-1.4\ntest"
        null_filename = "test\x00.exe.pdf"
        data = {"files": (io.BytesIO(pdf_content), null_filename)}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Should handle gracefully
        assert response.status_code in [200, 400]

    def test_very_long_filename_handled(self, authenticated_client):
        """Test that very long filenames are handled."""
        pdf_content = b"%PDF-1.4\ntest"
        long_filename = "a" * 1000 + ".pdf"
        data = {"files": (io.BytesIO(pdf_content), long_filename)}

        response = authenticated_client.post(
            "/api/upload/pdf",
            data=data,
            content_type="multipart/form-data",
        )

        # Should handle gracefully
        assert response.status_code in [200, 400]
